************************************************************
*****     Create final event study sample		       *****
************************************************************

/* CONTENTS:
- Create outcome variables and controls by sample
- Final cleaning for summary statistics
- Create Analytical Datasets and Samples Corresponding to Main Regressions
- Define DiD Hospitalization Sample
- Transform data to run event study
*/

*********************************************************
* Create Outcome Variables and Controls by Sample 
*********************************************************
*    
timer clear 1
timer on 1
set trace off
global names "2 3 4 4_week 4_month _all"  
global datasets "tf_all_2 tf_all_3 tf_all_4 tf_all_week_4 tf_all_month_4 sample_healthshock"

local N : word count $names
forvalues i = 1/`N' {
	
	local name `: word `i' of $names'
	local dataset `: word `i' of $datasets'
	
	local did_vars treat ageinptreat ageinphospitalnew year_treat 
	local matchdata "sample_healthshock"
	
	local eventstudy = 0
	if inlist(`i', 6, 6) {
		local eventstudy = 1 
		local did_vars
	}
	
	* Set max number of years ahead of the event we'd like to study
	local max_time 10
	
	{ // Main control and outcome creation 
		{ // * MOTHERS *
		* Merge individual information with treatment assignment
		use "$processed_data\Income_mom.dta", clear
		duplicates drop lopenr_mor, force /*only missings*/
		cap drop _merge
		if `eventstudy' != 1 {
			merge 1:m lopenr_mor using "$processed_data/`dataset'.dta" 
			drop if _merge==1 
			drop _merge
			** generate year of treat (treatmeent year and placebo year for control units)
			g year_treat = child_b_year + ageinptreat
			g yearinph=year_treat
			
		}
		else {
			merge 1:1 lopenr_mor using "$processed_data/`dataset'.dta" 
			drop if _merge==1
			drop _merge
			cap g yearinph=year(first_inphospital)

		}
		
		
		* INCOME before/after hospitalization
		** Gen variables in each year w.r.t yearinph
		forvalues x=0(1)`max_time'{
			g income_year`x'=.
		}
		forvalues y=2003(1)2013{
			forvalues x= 0(1)`max_time'{
			local year=`y'+`x'
			if `y'<=(2014-`x'){
				replace income_year`x'=income`year' if yearinph==`y'
				}
			}
		}

		** Calculate 5 years before (to check pretrends)
		forvalues x=1(1)5{
			g income_yearminus`x'=.
		}
		forvalues y=2003(1)2014{
			forvalues x= 1(1)5{
			local year=`y'-`x'
			if `year'>=2003{
				replace income_yearminus`x'=income`year' if yearinph==`y'
				}
			}
		}

		* EMPLOYMENT before/after hospitalization
		** Gen variables in each year w.r.t yearinph
		forvalues x=0(1)`max_time'{
			g working_year`x'=.
		}
		forvalues y=2003(1)2014{
			forvalues x= 0(1)`max_time'{
				local year=`y'+`x'
				if `y'<=(2014-`x'){
					replace working_year`x'=employed`year' if yearinph==`y'
				}
			}
		}

		** Calculate 5 years before (to check pretrends)
		forvalues x=1(1)5{
			g working_yearminus`x'=.
		}
		forvalues y=2003(1)2014{
			forvalues x= 1(1)5{
				local year=`y'-`x'
				if `year'>=2003{
					replace working_yearminus`x'=employed`year' if yearinph==`y'
				}
			}
		}
		
		* TRANSFERS before/after hospitalization
		** Gen variables in each year w.r.t yearinph
		forvalues x=0(1)`max_time'{
			g transfers_year`x'=.
		}

		forvalues y=2003(1)2014{
			forvalues x= 0(1)`max_time'{
				local year=`y'+`x'
				if `y'<=(2014-`x'){
					replace transfers_year`x'=transfers`year' if yearinph==`y'
				}
			}
		}

		** Calculate 5 years before (to check pretrends)
		forvalues x=1(1)5{
			g transfers_yearminus`x'=.
		}

		forvalues y=2003(1)2014{
		forvalues x= 1(1)5{
				local year=`y'-`x'
				if `year'>=2003{
					replace transfers_yearminus`x'=transfers`year' if yearinph==`y'
				}
			}
		}
		
		* TOTAL INCOME before/after hospitalization
		** Gen variables in each year w.r.t yearinph
		forvalues x=0(1)`max_time'{
			g totalincome_year`x'=.
		}
		forvalues y=2003(1)2014{
			forvalues x= 0(1)`max_time'{
				local year=`y'+`x'
				if `y'<=(2014-`x'){
					replace totalincome_year`x'=totalincome`year' if yearinph==`y'
				}
			}
		}

		** Calculate 5 years before (to check pretrends)
		forvalues x=1(1)5{
			g totalincome_yearminus`x'=.
		}
		forvalues y=2003(1)2014{
			forvalues x= 1(1)5{
				local year=`y'-`x'
				if `year'>=2003{
					replace totalincome_yearminus`x'=totalincome`year' if yearinph==`y'
				}
			}
		}

			
		* EDUCATION before/after hospitalization
		merge m:1 lopenr_mor using "$processed_data/Educ_mom.dta"  
        drop if _merge==2  
		drop _merge

		forvalues y=1992(1)2014{
			g educ`y'=substr(BU`y',1,1)
			destring educ`y', replace
			gen uni_educ`y'=(educ`y'>=6) if  educ`y'!=. & educ`y'!=9
		}
		** Create var for level of education in the year of first hospital admission
		gen educ0=.
		gen uni_educ0=.
		forvalues y=1992(1)2014{
			replace educ0=educ`y' if yearinph==`y'
			replace uni_educ0=uni_educ`y' if yearinph==`y'
		}
		
		
		* PROBABILITY of DIVORCE
		** Gen variables in each year w.r.t yearinph
		merge m:1 lopenr_mor using "$processed_data/Marital_status_mom.dta" 
		drop if _merge==2   
		drop _merge

		forvalues x=0(1)`max_time'{
			g divorced_year`x'=.
		}

		forvalues y=2003(1)2014{
			forvalues x= 0(1)`max_time'{
				local year=`y'+`x'
				if `y'<=(2014-`x'){
					replace divorced_year`x'=(divorced`year'==1)  if yearinph==`y'
				}
			}
		}
		
		** Calculate 5 years before (to check pretrends)
		forvalues x=1(1)5{
			g divorced_yearminus`x'=.
		}
		forvalues y=2003(1)2014{
			forvalues x= 1(1)5{
				local year=`y'-`x'
				if `year'>=2003{
					replace divorced_yearminus`x'=(divorced`year'==1) if yearinph==`y'
				}
			}
		}
		
		* PROBABILITY of MARRIED
		forvalues x=0(1)`max_time'{
			g married_year`x'=.
		}

		forvalues y=2003(1)2014{
			forvalues x= 0(1)`max_time'{
				local year=`y'+`x'
				if `y'<=(2014-`x'){
					replace married_year`x'=(married`year'==1)  if yearinph==`y'
				}
			}
		}
		
		** Calculate 5 years before (to check pretrends)
		forvalues x=1(1)5{
		g married_yearminus`x'=.
		}
		
		forvalues y=2003(1)2014{
			forvalues x= 1(1)5{
				local year=`y'-`x'
				if `year'>=2003{
					replace married_yearminus`x'=(married`year'==1) if yearinph==`y'
				}
			}
		}
		
		
		* PROBABILITY of UNMARRIED
		forvalues x=0(1)`max_time'{
			g unmarried_year`x'=.
		}

		forvalues y=2003(1)2014{
			forvalues x= 0(1)`max_time'{
				local year=`y'+`x'
				if `y'<=(2014-`x'){
					replace unmarried_year`x'=(unmarried`year'==1)  if yearinph==`y'
				}
			}
		}
		

		** Calculate 5 years before (to check pretrends)
		forvalues x=1(1)5{
			g unmarried_yearminus`x'=.
		}
		forvalues y=2003(1)2014{
			forvalues x= 1(1)5{
				local year=`y'-`x'
				if `year'>=2003{
					replace unmarried_yearminus`x'=(unmarried`year'==1) if yearinph==`y'
				}
			}
		}
		
		
		* MENTAL HEALTH before/after hospitalization
		merge m:1 lopenr_mor using "$processed_data/Mental_health_mom.dta"
        drop if _merge==2  
		drop _merge
		
		foreach y of numlist 2006(1)2014{
			replace n_diag_year`y'=0 if n_diag_year`y'==.
		}

		** same as income data: clean mental health five years and ten years after health shock 
		forvalues x=0(1)4{
			g mentalhealthyear`x'=.
		}

		forvalues y=2006(1)2014{
			forvalues x= 0(1)4{
				local year=`y'+`x'
				if `y'<=(2014-`x'){
					replace mentalhealthyear`x'=n_diag_year`year' if yearinph==`y'
				}
			}
		}

		** Calculate 5 years before (to check pretrends)
		forvalues x=1(1)5{
			g mentalhealthyearminus`x'=.
		}

		forvalues y=2006(1)2014{
			forvalues x= 1(1)5{
				local year=`y'-`x'
				if `year'>=2006{
					replace mentalhealthyearminus`x'=n_diag_year`year' if yearinph==`y'
				}
			}
		}

		

		keep `did_vars' lopenr_mor income_year* working_year* transfers_year* totalincome_year* /// 
		educ0 uni_educ0 married_year* unmarried_year* divorced_year* mentalhealthyear*
		lab var lopenr_mor "mother id"
		
		save "$processed_data\mothers_income_diagnosis`name'.dta", replace 
		}

		
		{ // * FATHERS *

		* Merge individual information with treatment assignment
		use "$processed_data\Income_dad.dta", clear 
		cap drop _merge
		duplicates drop lopenr_far, force
		** Match fathers with data on childhood shock 
		merge 1:m lopenr_far using "$processed_data/`matchdata'.dta"
		drop if _merge==1
		drop _merge
		duplicates drop child_b_year lopenr_mor, force
		if `eventstudy' != 1 {
			merge 1:m lopenr_mor using "$processed_data/`dataset'.dta" 
			drop if _merge==1
			drop _merge
			** g year of treat (treatmeent year and placebo year for control units)
			
			g year_treat = child_b_year + ageinptreat
			g yearinph=year_treat

		}
		else {
			cap g yearinph=year(first_inphospital)
			
		}
		
		* INCOME before/after hospitalization
		** Gen variables in each year w.r.t yearinph
		forvalues x=0(1)`max_time'{
			g father_income_year`x'=.
		}
		forvalues y=2003(1)2014{
			forvalues x= 0(1)`max_time'{
			local year=`y'+`x'
			if `y'<=(2014-`x'){
				replace father_income_year`x'=income`year' if yearinph==`y'
				}
			}
		}

		** Calculate 5 years before (to check pretrends)
		forvalues x=1(1)5{
			g father_income_yearminus`x'=.
		}
		forvalues y=2003(1)2014{
			forvalues x= 1(1)5{
			local year=`y'-`x'
			if `year'>=2003{
				replace father_income_yearminus`x'=income`year' if yearinph==`y'
				}
			}
		}

		* EMPLOYMENT before/after hospitalization
		** Gen variables in each year w.r.t yearinph
		forvalues x=0(1)`max_time'{
			g father_working_year`x'=.
		}
		forvalues y=2003(1)2014{
			forvalues x= 0(1)`max_time'{
				local year=`y'+`x'
				if `y'<=(2014-`x'){
					replace father_working_year`x'=employed`year' if yearinph==`y'
				}
			}
		}

		** Calculate 5 years before (to check pretrends)
		forvalues x=1(1)5{
			g father_working_yearminus`x'=.
		}
		forvalues y=2003(1)2014{
			forvalues x= 1(1)5{
				local year=`y'-`x'
				if `year'>=2003{
					replace father_working_yearminus`x'=employed`year' if yearinph==`y'
				}
			}
		}

		
		* TRANSFERS before/after hospitalization
		** Gen variables in each year w.r.t yearinph
		forvalues x=0(1)`max_time'{
			g father_transfers_year`x'=.
		}

		forvalues y=2003(1)2014{
			forvalues x= 0(1)`max_time'{
				local year=`y'+`x'
				if `y'<=(2014-`x'){
					replace father_transfers_year`x'=transfers`year' if yearinph==`y'
				}
			}
		}

		* calculate transfers in x years before (to check pretrends)
		forvalues x=1(1)5{
			g father_transfers_yearminus`x'=.
		}

		forvalues y=2003(1)2014{
		forvalues x= 1(1)5{
				local year=`y'-`x'
				if `year'>=2003{
					replace father_transfers_yearminus`x'=transfers`year' if yearinph==`y'
				}
			}
		}
		
		* TOTAL INCOME before/after hospitalization
		** Gen variables in each year w.r.t yearinph
		forvalues x=0(1)`max_time'{
			g f_totalincome_year`x'=.
		}
		forvalues y=2003(1)2014{
			forvalues x= 0(1)`max_time'{
				local year=`y'+`x'
				if `y'<=(2014-`x'){
					replace f_totalincome_year`x'=totalincome`year' if yearinph==`y'
				}
			}
		}

		** Calculate income in 5 years before (to check pretrends)
		forvalues x=1(1)5{
			g f_totalincome_yearminus`x'=.
		}
		forvalues y=2003(1)2014{
			forvalues x= 1(1)5{
				local year=`y'-`x'
				if `year'>=2003{
					replace f_totalincome_yearminus`x'=totalincome`year' if yearinph==`y'
				}
			}
		}

		* EDUCATION before/after hospitalization
		** Clean
		merge m:1 lopenr_far using "$processed_data/Educ_dad.dta" 
		drop if _merge==2  
		drop _merge
		
		forvalues y=1992(1)2014{
			g father_educ`y'=substr(BU`y',1,1)
			destring father_educ`y', replace
			gen father_uni_educ`y'=(father_educ`y'>=6) if  father_educ`y'!=. & father_educ`y'!=9
		}
		** Create var for level of education in the year of first hospital admission
		gen father_educ0=.
		gen father_uni_educ0=.
		forvalues y=1992(1)2014{
			replace father_educ0=father_educ`y' if yearinph==`y'
			replace father_uni_educ0=father_uni_educ`y' if yearinph==`y'
		}


		* PROBABILITY of DIVORCE
		merge m:1 lopenr_far using "$processed_data/Marital_status_dad.dta" 
		drop if _merge==2  
		drop _merge

		forvalues x=0(1)`max_time'{
			g father_divorced_year`x'=.
		}

		forvalues y=2003(1)2014{
			forvalues x= 0(1)`max_time'{
				local year=`y'+`x'
				if `y'<=(2014-`x'){
					replace father_divorced_year`x'=divorced`year'  if yearinph==`y'
				}
			}
		}
		
		
		** 5 years before (to check pretrends)
		forvalues x=1(1)5{
			g father_divorced_yearminus`x'=.
		}
		
		forvalues y=2003(1)2014{
			forvalues x= 1(1)5{
				local year=`y'-`x'
				if `year'>=2003{
					replace father_divorced_yearminus`x'=divorced`year' if yearinph==`y'
				}
			}
		}
		

		* PROBABILITY of MARRIED
		forvalues x=0(1)`max_time'{
			g father_married_year`x'=.
		}

		forvalues y=2003(1)2014{
			forvalues x= 0(1)`max_time'{
				local year=`y'+`x'
				if `y'<=(2014-`x'){
					replace father_married_year`x'=(married`year'==1)  if yearinph==`y'
				}
			}
		}
		
		** 5 years before (to check pretrends)
		forvalues x=1(1)5{
			g father_married_yearminus`x'=.
		}
		
		forvalues y=2003(1)2014{
			forvalues x= 1(1)5{
				local year=`y'-`x'
				if `year'>=2003{
					replace father_married_yearminus`x'=(married`year'==1) if yearinph==`y'
				}
			}
		}
		
		
		* PROBABILITY of UNMARRIED
		forvalues x= 0(1)`max_time'{
    		g father_unmarried_year`x'=.
        }
		
        forvalues y=2003(1)2014{
		    	forvalues x= 0(1)`max_time'{
				 local year=`y'+`x'
				if `y'<=(2014-`x'){
					replace father_unmarried_year`x'=(unmarried`year'==1)  if yearinph==`y'
				}
			}
		}
		
		** 5 years before (to check pretrends)
		forvalues x=1(1)5{
			g father_unmarried_yearminus`x'=.
		}
		
		forvalues y=2003(1)2014{
			forvalues x= 1(1)5{
				local year=`y'-`x'
				if `year'>=2003{
					replace father_unmarried_yearminus`x'=(unmarried`year'==1) if yearinph==`y'
				}
			}
		}
		
		merge m:1 lopenr_far using "$processed_data/Mental_health_dad.dta"
		drop if _merge==2 // drop if it is a health diagnosis for a someonee not in our mother sample
		drop _merge
		
		foreach y of numlist 2006(1)2014{
			replace n_diag_year`y'=0 if n_diag_year`y'==.
		}

		** same as income data: clean mental health five years and ten years after health shock 
		forvalues x=0(1)4{
			g mentalhealthdadyear`x'=.
		}

		forvalues y=2006(1)2014{
			forvalues x= 0(1)4{
				local year=`y'+`x'
				if `y'<=(2014-`x'){
					replace mentalhealthdadyear`x'=n_diag_year`year' if yearinph==`y'
				}
			}
		}

		** calculate mhealth visits in x years before (to check pretrends)
		forvalues x=1(1)5{
			g mentalhealthdadyearminus`x'=.
		}

		forvalues y=2006(1)2014{
			forvalues x= 1(1)5{
				local year=`y'-`x'
				if `year'>=2006{
					replace mentalhealthdadyearminus`x'=n_diag_year`year' if yearinph==`y'
				}
			}
		}
		
		
		
		keep `did_vars' lopenr_far lopenr_mor father_income_year* ///
		father_working* father_transfers_year* f_totalincome_year* ///
		father_educ0 father_uni_educ0 father_married_year* father_unmarried_year* ///
		father_divorced_year* mentalhealthdadyear*
		 

		save "$processed_data/fathers_income_diagnosis`name'.dta", replace
	
	}
}

timer off 1
timer list

}
*********************************************************
* Final Cleaning for Summary Statistics
*********************************************************
global datasets_mother "mothers_income_diagnosis4 mothers_income_diagnosis_all"
global datasets_father "fathers_income_diagnosis4 fathers_income_diagnosis_all"

foreach dataset of global datasets_mother {
	use "$processed_data/`dataset'.dta", clear 
	

	** Education degrees 
	forvalues y=3(1)8 {
	g educ0_`y'=(educ0==`y')
	}
	
	label var educ0_3 "Upper secondary, basic educ. level"
	label var educ0_4 "Upper secondary, final year"
	label var educ0_5 "Post secondary non-tertiary"
	label var educ0_6 "Bachelor's or equivalent level"
	label var educ0_7 "Master's or equivalent level"
	label var educ0_8 "Doctoral or equivalent level"
	
	** Mother's income 
	label var income_yearminus2 "Earnings t=-2"
	** Mother's employment
	label var working_yearminus2 "Prob. working t=-2"
	** Mother total income
	label var totalincome_yearminus2 "Total earnings t=-2"
	** Mother's transfers
	label var transfers_yearminus2 "Transfers t=-2"
	** Mother's mental health visits (N)
	label var mentalhealthyearminus2 "N visits mental health t-2"
	*** Prob mother mental health visit
	cap gen mentalhealthyearminus2_any = (mentalhealthyearminus2 >0) if ///
	!missing(mentalhealthyearminus2)
	label var mentalhealthyearminus2_any "Prob. mental health visit t-2"
	** Divorced
	label var divorced_yearminus2 "Prob. divorced t-2"
	** Married
	label var married_yearminus2 "Prob. married t-2"
	** Unmarried
    label var unmarried_yearminus2 "Prob. unmarried t-2"
	
	
	save "$processed_data/`dataset'.dta", replace 
}

foreach dataset of global datasets_father {
	use "$processed_data/`dataset'.dta", clear
	
	** Education degrees 
	forvalues y=3(1)8 {
	g father_educ0_`y'=(father_educ0==`y')
	}

	label var father_educ0_3 "Upper secondary, basic educ. level"
	label var father_educ0_4 "Upper secondary, final year"
	label var father_educ0_5 "Post secondary non-tertiary"
	label var father_educ0_6 "Bachelor's or equivalent level"
	label var father_educ0_7 "Master's or equivalent level"
	label var father_educ0_8 "Doctoral or equivalent level"
	
	** Father's income 
	label var father_income_yearminus2 "Earnings t=-2"
	** Father's employment
	label var father_working_yearminus2 "Prob. working t=-2"
	** Father's total income
	label var f_totalincome_yearminus2 "Total earnings t=-2"
	** Father's transfers
	label var father_transfers_yearminus2 "Transfers t=-2"
	** Father's mental health visits (N)
	label var mentalhealthdadyearminus2 "N visits mental health t-2"
	*** Prob father mental health visit
	cap gen mentalhealthdadyearminus2_any = (mentalhealthdadyearminus2 >0) if ///
	!missing(mentalhealthdadyearminus2)
	label var mentalhealthdadyearminus2_any "Prob. mental health visit t-2"
	label var father_divorced_yearminus2 "Prob. divorced t-2"
	
	save "$processed_data/`dataset'.dta", replace 
	
}

*********************************************************
* Create Analytical Datasets and Samples Corresponding to Main Regressions
*********************************************************
global sample_did !missing(father_income) & !missing(income) & ///
!missing(treat) & !missing(age_mother) & ///
!missing(age_father) & !missing(year_calendar) & ///
!missing(child_b_year) & !missing(educ0) & ///
!missing(father_educ0) & !missing(male) & ///
inrange(yearinp,2008,2014) 

/*
There should not be any issues remerging each mother has a unique value for yearinp, 
whereas this is not the case for ageinptreat. There, a mother can have up to 2 values, one for there actual year of treatment, and the year of treatment for which they act as control (if they act as control too). 
*/

global sample_event !missing(father_income) & !missing(income) ///
& !missing(year_calendar) & inrange(age_firstinphospital,6,18) & inrange(yearinp,2008,2014) 

*****************************
* DiD Hospitalization Sample
*****************************
use "$processed_data\sample_healthshock.dta", clear 
merge 1:m lopenr_mor using "$processed_data\mothers_income_diagnosis4", keep(3) nogen 
merge 1:1 lopenr_mor treat using "$processed_data\fathers_income_diagnosis4", keep(3) nogen

keep yearinp lopenr_mor lopenr_far age_mother_inp age_father_inp income_* ///
father_income_* educ0 child_b_year father_educ0 male treat ageinptreat year_treat 

egen new_shnromother = group(lopenr_mor treat)
reshape long income_ father_income_, ///
i(new_shnromother) j(year_event) string
drop new_shnromother
rename income_ income
rename father_income_ father_income
 
* gen numeric variable for time w.r.t  to the hospitalization
gen time_event=.
forvalues x=0(1)3{
	replace time_event=`x' if year_event=="year`x'"
}
forvalues x=1(1)5{
	replace time_event=-`x' if year_event=="yearminus`x'"
}

* gen year of income (calendar year)
gen year_calendar=year_treat + time_event

* gen age of mother in each year
gen mother_b_year=year_treat - age_mother_inp
gen age_mother=year_calendar - mother_b_year

* gen age of father in each year
gen father_b_year=year_treat - age_father_inp
gen age_father=year_calendar - father_b_year

* dummify time_event, with -2 as omitted variable
** all but i_time_even_4 (-2 time_event)
xi i.time_event,prefix("i_") noomit
global event_dummies i_time_even_1 i_time_even_2 i_time_even_3 i_time_even_5 ///
i_time_even_6 i_time_even_7 i_time_even_8 i_time_even_9

* g interactions 
local time_to_shock = -5 
forvalues i= 1(1)9{
	g even`i'_treat = i_time_even_`i'*treat
	label var even`i'_treat "`time_to_shock'"
	local ++time_to_shock 
}

** all but i_time_even_5 (-1 time_event)
global event_treat_dummies even1_treat even2_treat even3_treat even5_treat ///
even6_treat even7_treat even8_treat even9_treat

forvalues i= 1(1)3{
	local x = `i' + 6 
	label var i_time_even_`x' "`i' years after hospitalization" 
}
forvalues i= 1(1)5 {
	local x =  6 - `i'
	label var i_time_even_`i' "`x' years before hospitalization"
}

label var i_time_even_6 "Year of hospitalization" 

** Set controls
keep if $sample_did
gen sample_hospital = 1 
duplicates drop lopenr_mor, force

keep lopenr_mor sample_hospital
save "$processed_data\sample.dta", replace 

use "$processed_data\sample_healthshock.dta", clear 
merge 1:1 lopenr_mor using "$processed_data\sample.dta"
drop _merge 
replace sample_hospital = 0 if sample_hospital == .
tab sample_hospital, m 
save "$processed_data\sample_healthshock.dta", replace

*****************************
* Event Study Hospitalization
*****************************
use "$processed_data\sample_healthshock.dta", clear
** merge mothers income
merge 1:1 lopenr_mor using "$processed_data\mothers_income_diagnosis_all", keep(3) nogen
** merge fathers income
merge 1:1 lopenr_mor using "$processed_data\fathers_income_diagnosis_all", keep(3) nogen

keep yearinp lopenr_mor lopenr_far ///
income_* father_income_* age_firstinphospital

reshape long income_ father_income_,i(lopenr_mor) j(year_event) string
rename income_ income
rename father_income_ father_income

** gen numeric variable for time w.r.t health shock (health shock year==0)
gen time_event=.
forvalues x=0(1)3{
replace time_event=`x' if year_event=="year`x'"
}
forvalues x=1(1)5{
replace time_event=-`x' if year_event=="yearminus`x'"
}

** gen year of income (calendar year)
gen year_calendar=yearinp+time_event

* dummify time_event, with -2 as omitted variable
** all but i_time_even_4 (-2 time_event)
xi i.time_event,prefix("i_") noomit
global event_dummies i_time_even_2 i_time_even_3 i_time_even_5 ///
i_time_even_6 i_time_even_7 i_time_even_8 i_time_even_9

forvalues i= 1(1)3{
local x = `i' + 6 
label var i_time_even_`x' "`i' years after hospitalization" 
}
forvalues i= 1(1)5 {
local x =  6 - `i'
label var i_time_even_`i' "`x' years before hospitalization"
}

label var i_time_even_6 "Year of hospitalization"

keep if $sample_event

** Generate numeric ids to control for individual fixed effects
count if missing(lopenr_mor)
encode lopenr_mor, g (mother_id)
drop if missing(lopenr_far)
encode lopenr_far, g(father_id)

gen sample_hospital_event = 1 
duplicates drop lopenr_mor, force 
keep lopenr_mor sample_hospital_event
save "$processed_data\sample.dta", replace 

use "$processed_data\sample_healthshock.dta", clear 
merge 1:1 lopenr_mor using "$processed_data\sample.dta"
drop _merge 
replace sample_hospital_event = 0 if sample_hospital_event == . 
tab sample_hospital_event, m 
save "$processed_data\sample_healthshock.dta", replace

